options(repos = c(CRAN = "https://cloud.r-project.org"))
cat("\014")
rm(list = ls())
install.packages("plotly")
##
## The downloaded binary packages are in
## /var/folders/06/y6vmvyfj0wg08vb3rszcfy080000gn/T//RtmpGyMWrR/downloaded_packages
df <- read.csv("https://seattle-library-checkout-data.s3.us-west-2.amazonaws.com/norton-anthology_spl-checkouts_2005-2025.csv")
library("tidyverse")
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("plotly")
## Warning: package 'plotly' was built under R version 4.3.3
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
# most checked out authors
top_authors <- df %>%
group_by(Normalized.Creator) %>%
summarise(TotalCheckouts = sum(Checkouts, na.rm = TRUE)) %>%
arrange(desc(TotalCheckouts)) %>%
slice_head(n = 10)
print("Top 10 Most Checked Out Authors:")
## [1] "Top 10 Most Checked Out Authors:"
print(top_authors)
## # A tibble: 10 × 2
## Normalized.Creator TotalCheckouts
## <chr> <int>
## 1 Ursula K. Le Guin 73224
## 2 Octavia E. Butler 65386
## 3 Louise Erdrich 60846
## 4 N. K. Jemisin 59859
## 5 Toni Morrison 47501
## 6 Kurt Vonnegut 41462
## 7 George Saunders 38838
## 8 Philip K. Dick 38230
## 9 Sherman Alexie 37477
## 10 James Baldwin 32463
# most checked out titles
top_titles <- df %>%
group_by(Normalized.Title) %>%
summarise(TotalCheckouts = sum(Checkouts, na.rm = TRUE)) %>%
arrange(desc(TotalCheckouts)) %>%
slice_head(n = 10)
print("Top 10 Most Checked Out Titles:")
## [1] "Top 10 Most Checked Out Titles:"
print(top_titles)
## # A tibble: 10 × 2
## Normalized.Title TotalCheckouts
## <chr> <int>
## 1 Parable Of The Sower 21652
## 2 Lincoln In The Bardo 17356
## 3 The Fifth Season 17223
## 4 The Sympathizer 12792
## 5 Kindred 12591
## 6 Beloved 12330
## 7 The Left Hand Of Darkness 12148
## 8 The Absolutely True Diary Of A Part Time Indian 12043
## 9 The Year Of Magical Thinking 10907
## 10 The Sentence 10658
# Bar chart for top authors
ggplot(top_authors, aes(x = reorder(Normalized.Creator, TotalCheckouts), y = TotalCheckouts)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 Checked Out Authors", x = "Author", y = "Total Checkouts")

# Bar chart for top titles
ggplot(top_titles, aes(x = reorder(Normalized.Title, TotalCheckouts), y = TotalCheckouts)) +
geom_bar(stat = "identity", fill = "darkgreen") +
coord_flip() +
labs(title = "Top 10 Checked Out Titles", x = "Title", y = "Total Checkouts")

# Create CheckoutDate column
df <- df %>%
mutate(CheckoutDate = as.Date(paste(CheckoutYear, CheckoutMonth, "01", sep = "-")))
# Find top 10 authors by total checkouts
top10_authors <- df %>%
group_by(Normalized.Creator) %>%
summarise(TotalCheckouts = sum(Checkouts, na.rm = TRUE)) %>%
arrange(desc(TotalCheckouts)) %>%
slice_head(n = 10) %>%
pull(Normalized.Creator)
# Filter data to include only top 10 authors
top_authors_df <- df %>%
filter(Normalized.Creator %in% top10_authors)
# Group by date and author
author_ts_all <- top_authors_df %>%
group_by(CheckoutDate, Normalized.Creator) %>%
summarise(Checkouts = sum(Checkouts, na.rm = TRUE), .groups = "drop")
# Reorder authors by total checkouts (descending)
author_order <- author_ts_all %>%
group_by(Normalized.Creator) %>%
summarise(Total = sum(Checkouts, na.rm = TRUE)) %>%
arrange(desc(Total)) %>%
pull(Normalized.Creator)
author_ts_all$Normalized.Creator <- factor(author_ts_all$Normalized.Creator, levels = author_order)
# Plot
authors <- ggplot(author_ts_all, aes(x = CheckoutDate, y = Checkouts, color = Normalized.Creator)) +
geom_line(size = 1) +
labs(
title = "Monthly Checkouts for Top 10 Authors",
x = "Date", y = "Checkouts", color = "Author(Descending)"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplotly(authors)
# Normalize the date
df <- df %>%
mutate(CheckoutDate = as.Date(paste(CheckoutYear, CheckoutMonth, "01", sep = "-")))
# Get top 10 titles by total checkouts
top_10_titles <- df %>%
group_by(Normalized.Title) %>%
summarise(TotalCheckouts = sum(Checkouts, na.rm = TRUE)) %>%
arrange(desc(TotalCheckouts)) %>%
slice_head(n = 10) %>%
pull(Normalized.Title)
# Filter to top 10 titles only
df_top_titles <- df %>%
filter(Normalized.Title %in% top_10_titles)
# Group by title and checkout date
title_time_series <- df_top_titles %>%
group_by(Normalized.Title, CheckoutDate) %>%
summarise(Checkouts = sum(Checkouts, na.rm = TRUE), .groups = "drop")
# Reorder titles by total checkouts (descending)
title_order <- title_time_series %>%
group_by(Normalized.Title) %>%
summarise(Total = sum(Checkouts, na.rm = TRUE)) %>%
arrange(desc(Total)) %>%
pull(Normalized.Title)
title_time_series$Normalized.Title <- factor(title_time_series$Normalized.Title, levels = title_order)
authors <- ggplot(title_time_series, aes(x = CheckoutDate, y = Checkouts, color = Normalized.Title)) +
geom_line(size = 1) +
labs(
title = "Monthly Checkouts for Top 10 Titles",
x = "Date", y = "Checkouts", color = "Title(Descending)"
) +
theme_minimal() +
theme(legend.position = "right") # default vertical legend
ggplotly(authors)
butler_df <- df %>%
filter(Normalized.Creator == "Octavia E. Butler") %>%
group_by(CheckoutDate) %>%
summarise(Checkouts = sum(Checkouts, na.rm = TRUE))
butler <- ggplot(butler_df, aes(x = CheckoutDate, y = Checkouts)) +
geom_line(color = "steelblue", size = 1) +
labs(title = "Monthly Checkouts: Octavia E. Butler", x = "Date", y = "Checkouts") +
theme_minimal()
ggplotly(butler)
parable_df <- df %>%
filter(Normalized.Title == "Parable Of The Sower") %>%
group_by(CheckoutDate) %>%
summarise(Checkouts = sum(Checkouts, na.rm = TRUE))
ggplot(parable_df, aes(x = CheckoutDate, y = Checkouts)) +
geom_line(color = "firebrick", size = 1) +
labs(title = "Monthly Checkouts: Parable Of The Sower", x = "Date", y = "Checkouts") +
theme_minimal()

oliver_df <- df %>%
filter(Normalized.Creator == "Mary Oliver") %>%
group_by(CheckoutDate) %>%
summarise(Checkouts = sum(Checkouts, na.rm = TRUE))
ggplot(oliver_df, aes(x = CheckoutDate, y = Checkouts)) +
geom_line(color = "darkgreen", size = 1) +
labs(title = "Monthly Checkouts: Mary Oliver", x = "Date", y = "Checkouts") +
theme_minimal()
